package main

import (
	. "github.com/mmcloughlin/avo/build"
	. "github.com/mmcloughlin/avo/operand"
	. "github.com/mmcloughlin/avo/reg"
)

func genLt_F64() {

	data := GLOBL("dataLtF64", RODATA|NOPTR)
	DATA(0, U8(1))
	DATA(1, U8(1))
	DATA(2, U8(1))
	DATA(3, U8(1))
	DATA(4, U8(0))
	DATA(5, U8(0))
	DATA(6, U8(0))
	DATA(7, U8(0))
	DATA(8, U8(0))
	DATA(9, U8(0))
	DATA(10, U8(0))
	DATA(11, U8(0))
	DATA(12, U8(0))
	DATA(13, U8(0))
	DATA(14, U8(0))
	DATA(15, U8(0))

	TEXT("Lt_AVX2_F64", NOSPLIT, "func(x []bool, y, z []float64)")
	Pragma("noescape")
	Load(Param("x").Base(), RDI)
	Load(Param("y").Base(), RSI)
	Load(Param("z").Base(), RDX)
	Load(Param("x").Len(), RCX)

	TESTQ(RCX, RCX)
	JE(LabelRef("LBB0_7"))
	CMPQ(RCX, Imm(16))
	JAE(LabelRef("LBB0_3"))
	XORL(R8L, R8L)
	JMP(LabelRef("LBB0_6"))

	Label("LBB0_3")
	{
		MOVQ(RCX, R8)
		ANDQ(I32(-16), R8)
		XORL(EAX, EAX)
		VMOVDQU(data.Offset(0), X0)
	}

	Label("LBB0_4")
	{
		VMOVUPD(Mem{Base: RSI}.Idx(RAX, 8), Y1)
		VMOVUPD(Mem{Base: RSI}.Idx(RAX, 8).Offset(32), Y2)
		VMOVUPD(Mem{Base: RSI}.Idx(RAX, 8).Offset(64), Y3)
		VMOVUPD(Mem{Base: RSI}.Idx(RAX, 8).Offset(96), Y4)
		VCMPPD(Imm(1), Mem{Base: RDX}.Idx(RAX, 8), Y1, Y1)
		VEXTRACTF128(Imm(1), Y1, X5)
		VPACKSSDW(X5, X1, X1)
		VPACKSSDW(X1, X1, X1)
		VPACKSSWB(X1, X1, X1)
		VCMPPD(Imm(1), Mem{Base: RDX}.Idx(RAX, 8).Offset(32), Y2, Y2)
		VPAND(X0, X1, X1)
		VEXTRACTF128(Imm(1), Y2, X5)
		VPACKSSDW(X5, X2, X2)
		VPACKSSDW(X2, X2, X2)
		VPACKSSWB(X2, X2, X2)
		VPAND(X0, X2, X2)
		VCMPPD(Imm(1), Mem{Base: RDX}.Idx(RAX, 8).Offset(64), Y3, Y3)
		VPUNPCKLDQ(X2, X1, X1)
		VEXTRACTF128(Imm(1), Y3, X2)
		VPACKSSDW(X2, X3, X2)
		VPACKSSDW(X2, X2, X2)
		VPACKSSWB(X2, X2, X2)
		VPAND(X0, X2, X2)
		VCMPPD(Imm(1), Mem{Base: RDX}.Idx(RAX, 8).Offset(96), Y4, Y3)
		VEXTRACTF128(Imm(1), Y3, X4)
		VPACKSSDW(X4, X3, X3)
		VPACKSSDW(X3, X3, X3)
		VPACKSSWB(X3, X3, X3)
		VPAND(X0, X3, X3)
		VPBROADCASTD(X3, X3)
		VPBROADCASTD(X2, X2)
		VPUNPCKLDQ(X3, X2, X2)
		VPBLENDD(Imm(12), X2, X1, X1)
		VMOVDQU(X1, Mem{Base: RDI}.Idx(RAX, 1))
		ADDQ(Imm(16), RAX)
		CMPQ(R8, RAX)
		JNE(LabelRef("LBB0_4"))
		CMPQ(R8, RCX)
		JE(LabelRef("LBB0_7"))
	}

	Label("LBB0_6")
	{
		VMOVSD(Mem{Base: RSI}.Idx(R8, 8), X0)
		VUCOMISD(Mem{Base: RDX}.Idx(R8, 8), X0)
		SETCS(Mem{Base: RDI}.Idx(R8, 1))
		ADDQ(Imm(1), R8)
		CMPQ(RCX, R8)
		JNE(LabelRef("LBB0_6"))
	}

	Label("LBB0_7")
	{
		VZEROUPPER()
		RET()
	}
}

func genLt_F32() {

	data := GLOBL("dataLtF32", RODATA|NOPTR)
	DATA(0, U8(1))
	DATA(1, U8(1))
	DATA(2, U8(1))
	DATA(3, U8(1))
	DATA(4, U8(1))
	DATA(5, U8(1))
	DATA(6, U8(1))
	DATA(7, U8(1))
	DATA(8, U8(0))
	DATA(9, U8(0))
	DATA(10, U8(0))
	DATA(11, U8(0))
	DATA(12, U8(0))
	DATA(13, U8(0))
	DATA(14, U8(0))
	DATA(15, U8(0))

	TEXT("Lt_AVX2_F32", NOSPLIT, "func(x []bool, y, z []float32)")
	Pragma("noescape")
	Load(Param("x").Base(), RDI)
	Load(Param("y").Base(), RSI)
	Load(Param("z").Base(), RDX)
	Load(Param("x").Len(), RCX)

	TESTQ(RCX, RCX)
	JE(LabelRef("LBB1_7"))
	CMPQ(RCX, Imm(32))
	JAE(LabelRef("LBB1_3"))
	XORL(R8L, R8L)
	JMP(LabelRef("LBB1_6"))

	Label("LBB1_3")
	{
		MOVQ(RCX, R8)
		ANDQ(I32(-32), R8)
		XORL(EAX, EAX)
		VMOVDQU(data.Offset(0), X0)
	}

	Label("LBB1_4")
	{
		VMOVUPS(Mem{Base: RSI}.Idx(RAX, 4), Y1)
		VMOVUPS(Mem{Base: RSI}.Idx(RAX, 4).Offset(32), Y2)
		VMOVUPS(Mem{Base: RSI}.Idx(RAX, 4).Offset(64), Y3)
		VMOVUPS(Mem{Base: RSI}.Idx(RAX, 4).Offset(96), Y4)
		VCMPPS(Imm(1), Mem{Base: RDX}.Idx(RAX, 4), Y1, Y1)
		VEXTRACTF128(Imm(1), Y1, X5)
		VPACKSSDW(X5, X1, X1)
		VPACKSSWB(X1, X1, X1)
		VCMPPS(Imm(1), Mem{Base: RDX}.Idx(RAX, 4).Offset(32), Y2, Y2)
		VPAND(X0, X1, X1)
		VEXTRACTF128(Imm(1), Y2, X5)
		VPACKSSDW(X5, X2, X2)
		VPACKSSWB(X2, X2, X2)
		VPAND(X0, X2, X2)
		VCMPPS(Imm(1), Mem{Base: RDX}.Idx(RAX, 4).Offset(64), Y3, Y3)
		VEXTRACTF128(Imm(1), Y3, X5)
		VPACKSSDW(X5, X3, X3)
		VPACKSSWB(X3, X3, X3)
		VCMPPS(Imm(1), Mem{Base: RDX}.Idx(RAX, 4).Offset(96), Y4, Y4)
		VPAND(X0, X3, X3)
		VEXTRACTF128(Imm(1), Y4, X5)
		VPACKSSDW(X5, X4, X4)
		VPACKSSWB(X4, X4, X4)
		VPAND(X0, X4, X4)
		VINSERTI128(Imm(1), X4, Y3, Y3)
		VINSERTI128(Imm(1), X2, Y1, Y1)
		VPUNPCKLQDQ(Y3, Y1, Y1)
		VPERMQ(Imm(216), Y1, Y1)
		VMOVDQU(Y1, Mem{Base: RDI}.Idx(RAX, 1))
		ADDQ(Imm(32), RAX)
		CMPQ(R8, RAX)
		JNE(LabelRef("LBB1_4"))
		CMPQ(R8, RCX)
		JE(LabelRef("LBB1_7"))
	}

	Label("LBB1_6")
	{
		VMOVSS(Mem{Base: RSI}.Idx(R8, 4), X0)
		VUCOMISS(Mem{Base: RDX}.Idx(R8, 4), X0)
		SETCS(Mem{Base: RDI}.Idx(R8, 1))
		ADDQ(Imm(1), R8)
		CMPQ(RCX, R8)
		JNE(LabelRef("LBB1_6"))
	}

	Label("LBB1_7")
	{
		VZEROUPPER()
		RET()
	}
}

func genLte_F64() {

	data := GLOBL("dataLteF64", RODATA|NOPTR)
	DATA(0, U8(1))
	DATA(1, U8(1))
	DATA(2, U8(1))
	DATA(3, U8(1))
	DATA(4, U8(0))
	DATA(5, U8(0))
	DATA(6, U8(0))
	DATA(7, U8(0))
	DATA(8, U8(0))
	DATA(9, U8(0))
	DATA(10, U8(0))
	DATA(11, U8(0))
	DATA(12, U8(0))
	DATA(13, U8(0))
	DATA(14, U8(0))
	DATA(15, U8(0))

	TEXT("Lte_AVX2_F64", NOSPLIT, "func(x []bool, y, z []float64)")
	Pragma("noescape")
	Load(Param("x").Base(), RDI)
	Load(Param("y").Base(), RSI)
	Load(Param("z").Base(), RDX)
	Load(Param("x").Len(), RCX)

	TESTQ(RCX, RCX)
	JE(LabelRef("LBB2_7"))
	CMPQ(RCX, Imm(16))
	JAE(LabelRef("LBB2_3"))
	XORL(R8L, R8L)
	JMP(LabelRef("LBB2_6"))

	Label("LBB2_3")
	{
		MOVQ(RCX, R8)
		ANDQ(I32(-16), R8)
		XORL(EAX, EAX)
		VMOVDQU(data.Offset(0), X0)
	}

	Label("LBB2_4")
	{
		VMOVUPD(Mem{Base: RSI}.Idx(RAX, 8), Y1)
		VMOVUPD(Mem{Base: RSI}.Idx(RAX, 8).Offset(32), Y2)
		VMOVUPD(Mem{Base: RSI}.Idx(RAX, 8).Offset(64), Y3)
		VMOVUPD(Mem{Base: RSI}.Idx(RAX, 8).Offset(96), Y4)
		VCMPPD(Imm(2), Mem{Base: RDX}.Idx(RAX, 8), Y1, Y1)
		VEXTRACTF128(Imm(1), Y1, X5)
		VPACKSSDW(X5, X1, X1)
		VPACKSSDW(X1, X1, X1)
		VPACKSSWB(X1, X1, X1)
		VCMPPD(Imm(2), Mem{Base: RDX}.Idx(RAX, 8).Offset(32), Y2, Y2)
		VPAND(X0, X1, X1)
		VEXTRACTF128(Imm(1), Y2, X5)
		VPACKSSDW(X5, X2, X2)
		VPACKSSDW(X2, X2, X2)
		VPACKSSWB(X2, X2, X2)
		VPAND(X0, X2, X2)
		VCMPPD(Imm(2), Mem{Base: RDX}.Idx(RAX, 8).Offset(64), Y3, Y3)
		VPUNPCKLDQ(X2, X1, X1)
		VEXTRACTF128(Imm(1), Y3, X2)
		VPACKSSDW(X2, X3, X2)
		VPACKSSDW(X2, X2, X2)
		VPACKSSWB(X2, X2, X2)
		VPAND(X0, X2, X2)
		VCMPPD(Imm(2), Mem{Base: RDX}.Idx(RAX, 8).Offset(96), Y4, Y3)
		VEXTRACTF128(Imm(1), Y3, X4)
		VPACKSSDW(X4, X3, X3)
		VPACKSSDW(X3, X3, X3)
		VPACKSSWB(X3, X3, X3)
		VPAND(X0, X3, X3)
		VPBROADCASTD(X3, X3)
		VPBROADCASTD(X2, X2)
		VPUNPCKLDQ(X3, X2, X2)
		VPBLENDD(Imm(12), X2, X1, X1)
		VMOVDQU(X1, Mem{Base: RDI}.Idx(RAX, 1))
		ADDQ(Imm(16), RAX)
		CMPQ(R8, RAX)
		JNE(LabelRef("LBB2_4"))
		CMPQ(R8, RCX)
		JE(LabelRef("LBB2_7"))
	}

	Label("LBB2_6")
	{
		VMOVSD(Mem{Base: RSI}.Idx(R8, 8), X0)
		VUCOMISD(Mem{Base: RDX}.Idx(R8, 8), X0)
		SETLS(Mem{Base: RDI}.Idx(R8, 1))
		ADDQ(Imm(1), R8)
		CMPQ(RCX, R8)
		JNE(LabelRef("LBB2_6"))
	}

	Label("LBB2_7")
	{
		VZEROUPPER()
		RET()
	}
}

func genLte_F32() {

	data := GLOBL("dataLteF32", RODATA|NOPTR)
	DATA(0, U8(1))
	DATA(1, U8(1))
	DATA(2, U8(1))
	DATA(3, U8(1))
	DATA(4, U8(1))
	DATA(5, U8(1))
	DATA(6, U8(1))
	DATA(7, U8(1))
	DATA(8, U8(0))
	DATA(9, U8(0))
	DATA(10, U8(0))
	DATA(11, U8(0))
	DATA(12, U8(0))
	DATA(13, U8(0))
	DATA(14, U8(0))
	DATA(15, U8(0))

	TEXT("Lte_AVX2_F32", NOSPLIT, "func(x []bool, y, z []float32)")
	Pragma("noescape")
	Load(Param("x").Base(), RDI)
	Load(Param("y").Base(), RSI)
	Load(Param("z").Base(), RDX)
	Load(Param("x").Len(), RCX)

	TESTQ(RCX, RCX)
	JE(LabelRef("LBB3_7"))
	CMPQ(RCX, Imm(32))
	JAE(LabelRef("LBB3_3"))
	XORL(R8L, R8L)
	JMP(LabelRef("LBB3_6"))

	Label("LBB3_3")
	{
		MOVQ(RCX, R8)
		ANDQ(I32(-32), R8)
		XORL(EAX, EAX)
		VMOVDQU(data.Offset(0), X0)
	}

	Label("LBB3_4")
	{
		VMOVUPS(Mem{Base: RSI}.Idx(RAX, 4), Y1)
		VMOVUPS(Mem{Base: RSI}.Idx(RAX, 4).Offset(32), Y2)
		VMOVUPS(Mem{Base: RSI}.Idx(RAX, 4).Offset(64), Y3)
		VMOVUPS(Mem{Base: RSI}.Idx(RAX, 4).Offset(96), Y4)
		VCMPPS(Imm(2), Mem{Base: RDX}.Idx(RAX, 4), Y1, Y1)
		VEXTRACTF128(Imm(1), Y1, X5)
		VPACKSSDW(X5, X1, X1)
		VPACKSSWB(X1, X1, X1)
		VCMPPS(Imm(2), Mem{Base: RDX}.Idx(RAX, 4).Offset(32), Y2, Y2)
		VPAND(X0, X1, X1)
		VEXTRACTF128(Imm(1), Y2, X5)
		VPACKSSDW(X5, X2, X2)
		VPACKSSWB(X2, X2, X2)
		VPAND(X0, X2, X2)
		VCMPPS(Imm(2), Mem{Base: RDX}.Idx(RAX, 4).Offset(64), Y3, Y3)
		VEXTRACTF128(Imm(1), Y3, X5)
		VPACKSSDW(X5, X3, X3)
		VPACKSSWB(X3, X3, X3)
		VCMPPS(Imm(2), Mem{Base: RDX}.Idx(RAX, 4).Offset(96), Y4, Y4)
		VPAND(X0, X3, X3)
		VEXTRACTF128(Imm(1), Y4, X5)
		VPACKSSDW(X5, X4, X4)
		VPACKSSWB(X4, X4, X4)
		VPAND(X0, X4, X4)
		VINSERTI128(Imm(1), X4, Y3, Y3)
		VINSERTI128(Imm(1), X2, Y1, Y1)
		VPUNPCKLQDQ(Y3, Y1, Y1)
		VPERMQ(Imm(216), Y1, Y1)
		VMOVDQU(Y1, Mem{Base: RDI}.Idx(RAX, 1))
		ADDQ(Imm(32), RAX)
		CMPQ(R8, RAX)
		JNE(LabelRef("LBB3_4"))
		CMPQ(R8, RCX)
		JE(LabelRef("LBB3_7"))
	}

	Label("LBB3_6")
	{
		VMOVSS(Mem{Base: RSI}.Idx(R8, 4), X0)
		VUCOMISS(Mem{Base: RDX}.Idx(R8, 4), X0)
		SETLS(Mem{Base: RDI}.Idx(R8, 1))
		ADDQ(Imm(1), R8)
		CMPQ(RCX, R8)
		JNE(LabelRef("LBB3_6"))
	}

	Label("LBB3_7")
	{
		VZEROUPPER()
		RET()
	}
}

func genGt_F64() {

	data := GLOBL("dataGtF64", RODATA|NOPTR)
	DATA(0, U8(1))
	DATA(1, U8(1))
	DATA(2, U8(1))
	DATA(3, U8(1))
	DATA(4, U8(0))
	DATA(5, U8(0))
	DATA(6, U8(0))
	DATA(7, U8(0))
	DATA(8, U8(0))
	DATA(9, U8(0))
	DATA(10, U8(0))
	DATA(11, U8(0))
	DATA(12, U8(0))
	DATA(13, U8(0))
	DATA(14, U8(0))
	DATA(15, U8(0))

	TEXT("Gt_AVX2_F64", NOSPLIT, "func(x []bool, y, z []float64)")
	Pragma("noescape")
	Load(Param("x").Base(), RDI)
	Load(Param("y").Base(), RSI)
	Load(Param("z").Base(), RDX)
	Load(Param("x").Len(), RCX)

	TESTQ(RCX, RCX)
	JE(LabelRef("LBB4_7"))
	CMPQ(RCX, Imm(16))
	JAE(LabelRef("LBB4_3"))
	XORL(R8L, R8L)
	JMP(LabelRef("LBB4_6"))

	Label("LBB4_3")
	{
		MOVQ(RCX, R8)
		ANDQ(I32(-16), R8)
		XORL(EAX, EAX)
		VMOVDQU(data.Offset(0), X0)
	}

	Label("LBB4_4")
	{
		VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8), Y1)
		VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8).Offset(32), Y2)
		VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8).Offset(64), Y3)
		VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8).Offset(96), Y4)
		VCMPPD(Imm(1), Mem{Base: RSI}.Idx(RAX, 8), Y1, Y1)
		VEXTRACTF128(Imm(1), Y1, X5)
		VPACKSSDW(X5, X1, X1)
		VPACKSSDW(X1, X1, X1)
		VPACKSSWB(X1, X1, X1)
		VCMPPD(Imm(1), Mem{Base: RSI}.Idx(RAX, 8).Offset(32), Y2, Y2)
		VPAND(X0, X1, X1)
		VEXTRACTF128(Imm(1), Y2, X5)
		VPACKSSDW(X5, X2, X2)
		VPACKSSDW(X2, X2, X2)
		VPACKSSWB(X2, X2, X2)
		VPAND(X0, X2, X2)
		VCMPPD(Imm(1), Mem{Base: RSI}.Idx(RAX, 8).Offset(64), Y3, Y3)
		VPUNPCKLDQ(X2, X1, X1)
		VEXTRACTF128(Imm(1), Y3, X2)
		VPACKSSDW(X2, X3, X2)
		VPACKSSDW(X2, X2, X2)
		VPACKSSWB(X2, X2, X2)
		VPAND(X0, X2, X2)
		VCMPPD(Imm(1), Mem{Base: RSI}.Idx(RAX, 8).Offset(96), Y4, Y3)
		VEXTRACTF128(Imm(1), Y3, X4)
		VPACKSSDW(X4, X3, X3)
		VPACKSSDW(X3, X3, X3)
		VPACKSSWB(X3, X3, X3)
		VPAND(X0, X3, X3)
		VPBROADCASTD(X3, X3)
		VPBROADCASTD(X2, X2)
		VPUNPCKLDQ(X3, X2, X2)
		VPBLENDD(Imm(12), X2, X1, X1)
		VMOVDQU(X1, Mem{Base: RDI}.Idx(RAX, 1))
		ADDQ(Imm(16), RAX)
		CMPQ(R8, RAX)
		JNE(LabelRef("LBB4_4"))
		CMPQ(R8, RCX)
		JE(LabelRef("LBB4_7"))
	}

	Label("LBB4_6")
	{
		VMOVSD(Mem{Base: RSI}.Idx(R8, 8), X0)
		VUCOMISD(Mem{Base: RDX}.Idx(R8, 8), X0)
		SETHI(Mem{Base: RDI}.Idx(R8, 1))
		ADDQ(Imm(1), R8)
		CMPQ(RCX, R8)
		JNE(LabelRef("LBB4_6"))
	}

	Label("LBB4_7")
	{
		VZEROUPPER()
		RET()
	}
}

func genGt_F32() {

	data := GLOBL("dataGtF32", RODATA|NOPTR)
	DATA(0, U8(1))
	DATA(1, U8(1))
	DATA(2, U8(1))
	DATA(3, U8(1))
	DATA(4, U8(1))
	DATA(5, U8(1))
	DATA(6, U8(1))
	DATA(7, U8(1))
	DATA(8, U8(0))
	DATA(9, U8(0))
	DATA(10, U8(0))
	DATA(11, U8(0))
	DATA(12, U8(0))
	DATA(13, U8(0))
	DATA(14, U8(0))
	DATA(15, U8(0))

	TEXT("Gt_AVX2_F32", NOSPLIT, "func(x []bool, y, z []float32)")
	Pragma("noescape")
	Load(Param("x").Base(), RDI)
	Load(Param("y").Base(), RSI)
	Load(Param("z").Base(), RDX)
	Load(Param("x").Len(), RCX)

	TESTQ(RCX, RCX)
	JE(LabelRef("LBB5_7"))
	CMPQ(RCX, Imm(32))
	JAE(LabelRef("LBB5_3"))
	XORL(R8L, R8L)
	JMP(LabelRef("LBB5_6"))

	Label("LBB5_3")
	{
		MOVQ(RCX, R8)
		ANDQ(I32(-32), R8)
		XORL(EAX, EAX)
		VMOVDQU(data.Offset(0), X0)
	}

	Label("LBB5_4")
	{
		VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4), Y1)
		VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4).Offset(32), Y2)
		VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4).Offset(64), Y3)
		VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4).Offset(96), Y4)
		VCMPPS(Imm(1), Mem{Base: RSI}.Idx(RAX, 4), Y1, Y1)
		VEXTRACTF128(Imm(1), Y1, X5)
		VPACKSSDW(X5, X1, X1)
		VPACKSSWB(X1, X1, X1)
		VCMPPS(Imm(1), Mem{Base: RSI}.Idx(RAX, 4).Offset(32), Y2, Y2)
		VPAND(X0, X1, X1)
		VEXTRACTF128(Imm(1), Y2, X5)
		VPACKSSDW(X5, X2, X2)
		VPACKSSWB(X2, X2, X2)
		VPAND(X0, X2, X2)
		VCMPPS(Imm(1), Mem{Base: RSI}.Idx(RAX, 4).Offset(64), Y3, Y3)
		VEXTRACTF128(Imm(1), Y3, X5)
		VPACKSSDW(X5, X3, X3)
		VPACKSSWB(X3, X3, X3)
		VCMPPS(Imm(1), Mem{Base: RSI}.Idx(RAX, 4).Offset(96), Y4, Y4)
		VPAND(X0, X3, X3)
		VEXTRACTF128(Imm(1), Y4, X5)
		VPACKSSDW(X5, X4, X4)
		VPACKSSWB(X4, X4, X4)
		VPAND(X0, X4, X4)
		VINSERTI128(Imm(1), X4, Y3, Y3)
		VINSERTI128(Imm(1), X2, Y1, Y1)
		VPUNPCKLQDQ(Y3, Y1, Y1)
		VPERMQ(Imm(216), Y1, Y1)
		VMOVDQU(Y1, Mem{Base: RDI}.Idx(RAX, 1))
		ADDQ(Imm(32), RAX)
		CMPQ(R8, RAX)
		JNE(LabelRef("LBB5_4"))
		CMPQ(R8, RCX)
		JE(LabelRef("LBB5_7"))
	}

	Label("LBB5_6")
	{
		VMOVSS(Mem{Base: RSI}.Idx(R8, 4), X0)
		VUCOMISS(Mem{Base: RDX}.Idx(R8, 4), X0)
		SETHI(Mem{Base: RDI}.Idx(R8, 1))
		ADDQ(Imm(1), R8)
		CMPQ(RCX, R8)
		JNE(LabelRef("LBB5_6"))
	}

	Label("LBB5_7")
	{
		VZEROUPPER()
		RET()
	}
}

func genGte_F64() {

	data := GLOBL("dataGteF64", RODATA|NOPTR)
	DATA(0, U8(1))
	DATA(1, U8(1))
	DATA(2, U8(1))
	DATA(3, U8(1))
	DATA(4, U8(0))
	DATA(5, U8(0))
	DATA(6, U8(0))
	DATA(7, U8(0))
	DATA(8, U8(0))
	DATA(9, U8(0))
	DATA(10, U8(0))
	DATA(11, U8(0))
	DATA(12, U8(0))
	DATA(13, U8(0))
	DATA(14, U8(0))
	DATA(15, U8(0))

	TEXT("Gte_AVX2_F64", NOSPLIT, "func(x []bool, y, z []float64)")
	Pragma("noescape")
	Load(Param("x").Base(), RDI)
	Load(Param("y").Base(), RSI)
	Load(Param("z").Base(), RDX)
	Load(Param("x").Len(), RCX)

	TESTQ(RCX, RCX)
	JE(LabelRef("LBB6_7"))
	CMPQ(RCX, Imm(16))
	JAE(LabelRef("LBB6_3"))
	XORL(R8L, R8L)
	JMP(LabelRef("LBB6_6"))

	Label("LBB6_3")
	{
		MOVQ(RCX, R8)
		ANDQ(I32(-16), R8)
		XORL(EAX, EAX)
		VMOVDQU(data.Offset(0), X0)
	}

	Label("LBB6_4")
	{
		VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8), Y1)
		VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8).Offset(32), Y2)
		VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8).Offset(64), Y3)
		VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8).Offset(96), Y4)
		VCMPPD(Imm(2), Mem{Base: RSI}.Idx(RAX, 8), Y1, Y1)
		VEXTRACTF128(Imm(1), Y1, X5)
		VPACKSSDW(X5, X1, X1)
		VPACKSSDW(X1, X1, X1)
		VPACKSSWB(X1, X1, X1)
		VCMPPD(Imm(2), Mem{Base: RSI}.Idx(RAX, 8).Offset(32), Y2, Y2)
		VPAND(X0, X1, X1)
		VEXTRACTF128(Imm(1), Y2, X5)
		VPACKSSDW(X5, X2, X2)
		VPACKSSDW(X2, X2, X2)
		VPACKSSWB(X2, X2, X2)
		VPAND(X0, X2, X2)
		VCMPPD(Imm(2), Mem{Base: RSI}.Idx(RAX, 8).Offset(64), Y3, Y3)
		VPUNPCKLDQ(X2, X1, X1)
		VEXTRACTF128(Imm(1), Y3, X2)
		VPACKSSDW(X2, X3, X2)
		VPACKSSDW(X2, X2, X2)
		VPACKSSWB(X2, X2, X2)
		VPAND(X0, X2, X2)
		VCMPPD(Imm(2), Mem{Base: RSI}.Idx(RAX, 8).Offset(96), Y4, Y3)
		VEXTRACTF128(Imm(1), Y3, X4)
		VPACKSSDW(X4, X3, X3)
		VPACKSSDW(X3, X3, X3)
		VPACKSSWB(X3, X3, X3)
		VPAND(X0, X3, X3)
		VPBROADCASTD(X3, X3)
		VPBROADCASTD(X2, X2)
		VPUNPCKLDQ(X3, X2, X2)
		VPBLENDD(Imm(12), X2, X1, X1)
		VMOVDQU(X1, Mem{Base: RDI}.Idx(RAX, 1))
		ADDQ(Imm(16), RAX)
		CMPQ(R8, RAX)
		JNE(LabelRef("LBB6_4"))
		CMPQ(R8, RCX)
		JE(LabelRef("LBB6_7"))
	}

	Label("LBB6_6")
	{
		VMOVSD(Mem{Base: RSI}.Idx(R8, 8), X0)
		VUCOMISD(Mem{Base: RDX}.Idx(R8, 8), X0)
		SETCC(Mem{Base: RDI}.Idx(R8, 1))
		ADDQ(Imm(1), R8)
		CMPQ(RCX, R8)
		JNE(LabelRef("LBB6_6"))
	}

	Label("LBB6_7")
	{
		VZEROUPPER()
		RET()
	}
}

func genGte_F32() {

	data := GLOBL("dataGteF32", RODATA|NOPTR)
	DATA(0, U8(1))
	DATA(1, U8(1))
	DATA(2, U8(1))
	DATA(3, U8(1))
	DATA(4, U8(1))
	DATA(5, U8(1))
	DATA(6, U8(1))
	DATA(7, U8(1))
	DATA(8, U8(0))
	DATA(9, U8(0))
	DATA(10, U8(0))
	DATA(11, U8(0))
	DATA(12, U8(0))
	DATA(13, U8(0))
	DATA(14, U8(0))
	DATA(15, U8(0))

	TEXT("Gte_AVX2_F32", NOSPLIT, "func(x []bool, y, z []float32)")
	Pragma("noescape")
	Load(Param("x").Base(), RDI)
	Load(Param("y").Base(), RSI)
	Load(Param("z").Base(), RDX)
	Load(Param("x").Len(), RCX)

	TESTQ(RCX, RCX)
	JE(LabelRef("LBB7_7"))
	CMPQ(RCX, Imm(32))
	JAE(LabelRef("LBB7_3"))
	XORL(R8L, R8L)
	JMP(LabelRef("LBB7_6"))

	Label("LBB7_3")
	{
		MOVQ(RCX, R8)
		ANDQ(I32(-32), R8)
		XORL(EAX, EAX)
		VMOVDQU(data.Offset(0), X0)
	}

	Label("LBB7_4")
	{
		VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4), Y1)
		VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4).Offset(32), Y2)
		VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4).Offset(64), Y3)
		VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4).Offset(96), Y4)
		VCMPPS(Imm(2), Mem{Base: RSI}.Idx(RAX, 4), Y1, Y1)
		VEXTRACTF128(Imm(1), Y1, X5)
		VPACKSSDW(X5, X1, X1)
		VPACKSSWB(X1, X1, X1)
		VCMPPS(Imm(2), Mem{Base: RSI}.Idx(RAX, 4).Offset(32), Y2, Y2)
		VPAND(X0, X1, X1)
		VEXTRACTF128(Imm(1), Y2, X5)
		VPACKSSDW(X5, X2, X2)
		VPACKSSWB(X2, X2, X2)
		VPAND(X0, X2, X2)
		VCMPPS(Imm(2), Mem{Base: RSI}.Idx(RAX, 4).Offset(64), Y3, Y3)
		VEXTRACTF128(Imm(1), Y3, X5)
		VPACKSSDW(X5, X3, X3)
		VPACKSSWB(X3, X3, X3)
		VCMPPS(Imm(2), Mem{Base: RSI}.Idx(RAX, 4).Offset(96), Y4, Y4)
		VPAND(X0, X3, X3)
		VEXTRACTF128(Imm(1), Y4, X5)
		VPACKSSDW(X5, X4, X4)
		VPACKSSWB(X4, X4, X4)
		VPAND(X0, X4, X4)
		VINSERTI128(Imm(1), X4, Y3, Y3)
		VINSERTI128(Imm(1), X2, Y1, Y1)
		VPUNPCKLQDQ(Y3, Y1, Y1)
		VPERMQ(Imm(216), Y1, Y1)
		VMOVDQU(Y1, Mem{Base: RDI}.Idx(RAX, 1))
		ADDQ(Imm(32), RAX)
		CMPQ(R8, RAX)
		JNE(LabelRef("LBB7_4"))
		CMPQ(R8, RCX)
		JE(LabelRef("LBB7_7"))
	}

	Label("LBB7_6")
	{
		VMOVSS(Mem{Base: RSI}.Idx(R8, 4), X0)
		VUCOMISS(Mem{Base: RDX}.Idx(R8, 4), X0)
		SETCC(Mem{Base: RDI}.Idx(R8, 1))
		ADDQ(Imm(1), R8)
		CMPQ(RCX, R8)
		JNE(LabelRef("LBB7_6"))
	}

	Label("LBB7_7")
	{
		VZEROUPPER()
		RET()
	}
}

func genEq_F64() {

	data := GLOBL("dataEqF64", RODATA|NOPTR)
	DATA(0, U8(1))
	DATA(1, U8(1))
	DATA(2, U8(1))
	DATA(3, U8(1))
	DATA(4, U8(0))
	DATA(5, U8(0))
	DATA(6, U8(0))
	DATA(7, U8(0))
	DATA(8, U8(0))
	DATA(9, U8(0))
	DATA(10, U8(0))
	DATA(11, U8(0))
	DATA(12, U8(0))
	DATA(13, U8(0))
	DATA(14, U8(0))
	DATA(15, U8(0))

	TEXT("Eq_AVX2_F64", NOSPLIT, "func(x []bool, y, z []float64)")
	Pragma("noescape")
	Load(Param("x").Base(), RDI)
	Load(Param("y").Base(), RSI)
	Load(Param("z").Base(), RDX)
	Load(Param("x").Len(), RCX)

	TESTQ(RCX, RCX)
	JE(LabelRef("LBB8_7"))
	CMPQ(RCX, Imm(16))
	JAE(LabelRef("LBB8_3"))
	XORL(R8L, R8L)
	JMP(LabelRef("LBB8_6"))

	Label("LBB8_3")
	{
		MOVQ(RCX, R8)
		ANDQ(I32(-16), R8)
		XORL(EAX, EAX)
		VMOVDQU(data.Offset(0), X0)
	}

	Label("LBB8_4")
	{
		VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8), Y1)
		VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8).Offset(32), Y2)
		VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8).Offset(64), Y3)
		VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8).Offset(96), Y4)
		VCMPPD(Imm(0), Mem{Base: RSI}.Idx(RAX, 8), Y1, Y1)
		VEXTRACTF128(Imm(1), Y1, X5)
		VPACKSSDW(X5, X1, X1)
		VPACKSSDW(X1, X1, X1)
		VPACKSSWB(X1, X1, X1)
		VCMPPD(Imm(0), Mem{Base: RSI}.Idx(RAX, 8).Offset(32), Y2, Y2)
		VPAND(X0, X1, X1)
		VEXTRACTF128(Imm(1), Y2, X5)
		VPACKSSDW(X5, X2, X2)
		VPACKSSDW(X2, X2, X2)
		VPACKSSWB(X2, X2, X2)
		VPAND(X0, X2, X2)
		VCMPPD(Imm(0), Mem{Base: RSI}.Idx(RAX, 8).Offset(64), Y3, Y3)
		VPUNPCKLDQ(X2, X1, X1)
		VEXTRACTF128(Imm(1), Y3, X2)
		VPACKSSDW(X2, X3, X2)
		VPACKSSDW(X2, X2, X2)
		VPACKSSWB(X2, X2, X2)
		VPAND(X0, X2, X2)
		VCMPPD(Imm(0), Mem{Base: RSI}.Idx(RAX, 8).Offset(96), Y4, Y3)
		VEXTRACTF128(Imm(1), Y3, X4)
		VPACKSSDW(X4, X3, X3)
		VPACKSSDW(X3, X3, X3)
		VPACKSSWB(X3, X3, X3)
		VPAND(X0, X3, X3)
		VPBROADCASTD(X3, X3)
		VPBROADCASTD(X2, X2)
		VPUNPCKLDQ(X3, X2, X2)
		VPBLENDD(Imm(12), X2, X1, X1)
		VMOVDQU(X1, Mem{Base: RDI}.Idx(RAX, 1))
		ADDQ(Imm(16), RAX)
		CMPQ(R8, RAX)
		JNE(LabelRef("LBB8_4"))
		CMPQ(R8, RCX)
		JE(LabelRef("LBB8_7"))
	}

	Label("LBB8_6")
	{
		VMOVSD(Mem{Base: RSI}.Idx(R8, 8), X0)
		VUCOMISD(Mem{Base: RDX}.Idx(R8, 8), X0)
		SETEQ(Mem{Base: RDI}.Idx(R8, 1))
		ADDQ(Imm(1), R8)
		CMPQ(RCX, R8)
		JNE(LabelRef("LBB8_6"))
	}

	Label("LBB8_7")
	{
		VZEROUPPER()
		RET()
	}
}

func genEq_F32() {

	data := GLOBL("dataEqF32", RODATA|NOPTR)
	DATA(0, U8(1))
	DATA(1, U8(1))
	DATA(2, U8(1))
	DATA(3, U8(1))
	DATA(4, U8(1))
	DATA(5, U8(1))
	DATA(6, U8(1))
	DATA(7, U8(1))
	DATA(8, U8(0))
	DATA(9, U8(0))
	DATA(10, U8(0))
	DATA(11, U8(0))
	DATA(12, U8(0))
	DATA(13, U8(0))
	DATA(14, U8(0))
	DATA(15, U8(0))

	TEXT("Eq_AVX2_F32", NOSPLIT, "func(x []bool, y, z []float32)")
	Pragma("noescape")
	Load(Param("x").Base(), RDI)
	Load(Param("y").Base(), RSI)
	Load(Param("z").Base(), RDX)
	Load(Param("x").Len(), RCX)

	TESTQ(RCX, RCX)
	JE(LabelRef("LBB9_7"))
	CMPQ(RCX, Imm(32))
	JAE(LabelRef("LBB9_3"))
	XORL(R8L, R8L)
	JMP(LabelRef("LBB9_6"))

	Label("LBB9_3")
	{
		MOVQ(RCX, R8)
		ANDQ(I32(-32), R8)
		XORL(EAX, EAX)
		VMOVDQU(data.Offset(0), X0)
	}

	Label("LBB9_4")
	{
		VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4), Y1)
		VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4).Offset(32), Y2)
		VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4).Offset(64), Y3)
		VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4).Offset(96), Y4)
		VCMPPS(Imm(0), Mem{Base: RSI}.Idx(RAX, 4), Y1, Y1)
		VEXTRACTF128(Imm(1), Y1, X5)
		VPACKSSDW(X5, X1, X1)
		VPACKSSWB(X1, X1, X1)
		VCMPPS(Imm(0), Mem{Base: RSI}.Idx(RAX, 4).Offset(32), Y2, Y2)
		VPAND(X0, X1, X1)
		VEXTRACTF128(Imm(1), Y2, X5)
		VPACKSSDW(X5, X2, X2)
		VPACKSSWB(X2, X2, X2)
		VPAND(X0, X2, X2)
		VCMPPS(Imm(0), Mem{Base: RSI}.Idx(RAX, 4).Offset(64), Y3, Y3)
		VEXTRACTF128(Imm(1), Y3, X5)
		VPACKSSDW(X5, X3, X3)
		VPACKSSWB(X3, X3, X3)
		VCMPPS(Imm(0), Mem{Base: RSI}.Idx(RAX, 4).Offset(96), Y4, Y4)
		VPAND(X0, X3, X3)
		VEXTRACTF128(Imm(1), Y4, X5)
		VPACKSSDW(X5, X4, X4)
		VPACKSSWB(X4, X4, X4)
		VPAND(X0, X4, X4)
		VINSERTI128(Imm(1), X4, Y3, Y3)
		VINSERTI128(Imm(1), X2, Y1, Y1)
		VPUNPCKLQDQ(Y3, Y1, Y1)
		VPERMQ(Imm(216), Y1, Y1)
		VMOVDQU(Y1, Mem{Base: RDI}.Idx(RAX, 1))
		ADDQ(Imm(32), RAX)
		CMPQ(R8, RAX)
		JNE(LabelRef("LBB9_4"))
		CMPQ(R8, RCX)
		JE(LabelRef("LBB9_7"))
	}

	Label("LBB9_6")
	{
		VMOVSS(Mem{Base: RSI}.Idx(R8, 4), X0)
		VUCOMISS(Mem{Base: RDX}.Idx(R8, 4), X0)
		SETEQ(Mem{Base: RDI}.Idx(R8, 1))
		ADDQ(Imm(1), R8)
		CMPQ(RCX, R8)
		JNE(LabelRef("LBB9_6"))
	}

	Label("LBB9_7")
	{
		VZEROUPPER()
		RET()
	}
}

func genNeq_F64() {

	data := GLOBL("dataNeqF64", RODATA|NOPTR)
	DATA(0, U8(1))
	DATA(1, U8(1))
	DATA(2, U8(1))
	DATA(3, U8(1))
	DATA(4, U8(0))
	DATA(5, U8(0))
	DATA(6, U8(0))
	DATA(7, U8(0))
	DATA(8, U8(0))
	DATA(9, U8(0))
	DATA(10, U8(0))
	DATA(11, U8(0))
	DATA(12, U8(0))
	DATA(13, U8(0))
	DATA(14, U8(0))
	DATA(15, U8(0))

	TEXT("Neq_AVX2_F64", NOSPLIT, "func(x []bool, y, z []float64)")
	Pragma("noescape")
	Load(Param("x").Base(), RDI)
	Load(Param("y").Base(), RSI)
	Load(Param("z").Base(), RDX)
	Load(Param("x").Len(), RCX)

	TESTQ(RCX, RCX)
	JE(LabelRef("LBB10_7"))
	CMPQ(RCX, Imm(16))
	JAE(LabelRef("LBB10_3"))
	XORL(R8L, R8L)
	JMP(LabelRef("LBB10_6"))

	Label("LBB10_3")
	{
		MOVQ(RCX, R8)
		ANDQ(I32(-16), R8)
		XORL(EAX, EAX)
		VMOVDQU(data.Offset(0), X0)
	}

	Label("LBB10_4")
	{
		VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8), Y1)
		VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8).Offset(32), Y2)
		VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8).Offset(64), Y3)
		VMOVUPD(Mem{Base: RDX}.Idx(RAX, 8).Offset(96), Y4)
		VCMPPD(Imm(4), Mem{Base: RSI}.Idx(RAX, 8), Y1, Y1)
		VEXTRACTF128(Imm(1), Y1, X5)
		VPACKSSDW(X5, X1, X1)
		VPACKSSDW(X1, X1, X1)
		VPACKSSWB(X1, X1, X1)
		VCMPPD(Imm(4), Mem{Base: RSI}.Idx(RAX, 8).Offset(32), Y2, Y2)
		VPAND(X0, X1, X1)
		VEXTRACTF128(Imm(1), Y2, X5)
		VPACKSSDW(X5, X2, X2)
		VPACKSSDW(X2, X2, X2)
		VPACKSSWB(X2, X2, X2)
		VPAND(X0, X2, X2)
		VCMPPD(Imm(4), Mem{Base: RSI}.Idx(RAX, 8).Offset(64), Y3, Y3)
		VPUNPCKLDQ(X2, X1, X1)
		VEXTRACTF128(Imm(1), Y3, X2)
		VPACKSSDW(X2, X3, X2)
		VPACKSSDW(X2, X2, X2)
		VPACKSSWB(X2, X2, X2)
		VPAND(X0, X2, X2)
		VCMPPD(Imm(4), Mem{Base: RSI}.Idx(RAX, 8).Offset(96), Y4, Y3)
		VEXTRACTF128(Imm(1), Y3, X4)
		VPACKSSDW(X4, X3, X3)
		VPACKSSDW(X3, X3, X3)
		VPACKSSWB(X3, X3, X3)
		VPAND(X0, X3, X3)
		VPBROADCASTD(X3, X3)
		VPBROADCASTD(X2, X2)
		VPUNPCKLDQ(X3, X2, X2)
		VPBLENDD(Imm(12), X2, X1, X1)
		VMOVDQU(X1, Mem{Base: RDI}.Idx(RAX, 1))
		ADDQ(Imm(16), RAX)
		CMPQ(R8, RAX)
		JNE(LabelRef("LBB10_4"))
		CMPQ(R8, RCX)
		JE(LabelRef("LBB10_7"))
	}

	Label("LBB10_6")
	{
		VMOVSD(Mem{Base: RSI}.Idx(R8, 8), X0)
		VUCOMISD(Mem{Base: RDX}.Idx(R8, 8), X0)
		SETNE(Mem{Base: RDI}.Idx(R8, 1))
		ADDQ(Imm(1), R8)
		CMPQ(RCX, R8)
		JNE(LabelRef("LBB10_6"))
	}

	Label("LBB10_7")
	{
		VZEROUPPER()
		RET()
	}
}

func genNeq_F32() {

	data := GLOBL("dataNeqF32", RODATA|NOPTR)
	DATA(0, U8(1))
	DATA(1, U8(1))
	DATA(2, U8(1))
	DATA(3, U8(1))
	DATA(4, U8(1))
	DATA(5, U8(1))
	DATA(6, U8(1))
	DATA(7, U8(1))
	DATA(8, U8(0))
	DATA(9, U8(0))
	DATA(10, U8(0))
	DATA(11, U8(0))
	DATA(12, U8(0))
	DATA(13, U8(0))
	DATA(14, U8(0))
	DATA(15, U8(0))

	TEXT("Neq_AVX2_F32", NOSPLIT, "func(x []bool, y, z []float32)")
	Pragma("noescape")
	Load(Param("x").Base(), RDI)
	Load(Param("y").Base(), RSI)
	Load(Param("z").Base(), RDX)
	Load(Param("x").Len(), RCX)

	TESTQ(RCX, RCX)
	JE(LabelRef("LBB11_7"))
	CMPQ(RCX, Imm(32))
	JAE(LabelRef("LBB11_3"))
	XORL(R8L, R8L)
	JMP(LabelRef("LBB11_6"))

	Label("LBB11_3")
	{
		MOVQ(RCX, R8)
		ANDQ(I32(-32), R8)
		XORL(EAX, EAX)
		VMOVDQU(data.Offset(0), X0)
	}

	Label("LBB11_4")
	{
		VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4), Y1)
		VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4).Offset(32), Y2)
		VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4).Offset(64), Y3)
		VMOVUPS(Mem{Base: RDX}.Idx(RAX, 4).Offset(96), Y4)
		VCMPPS(Imm(4), Mem{Base: RSI}.Idx(RAX, 4), Y1, Y1)
		VEXTRACTF128(Imm(1), Y1, X5)
		VPACKSSDW(X5, X1, X1)
		VPACKSSWB(X1, X1, X1)
		VCMPPS(Imm(4), Mem{Base: RSI}.Idx(RAX, 4).Offset(32), Y2, Y2)
		VPAND(X0, X1, X1)
		VEXTRACTF128(Imm(1), Y2, X5)
		VPACKSSDW(X5, X2, X2)
		VPACKSSWB(X2, X2, X2)
		VPAND(X0, X2, X2)
		VCMPPS(Imm(4), Mem{Base: RSI}.Idx(RAX, 4).Offset(64), Y3, Y3)
		VEXTRACTF128(Imm(1), Y3, X5)
		VPACKSSDW(X5, X3, X3)
		VPACKSSWB(X3, X3, X3)
		VCMPPS(Imm(4), Mem{Base: RSI}.Idx(RAX, 4).Offset(96), Y4, Y4)
		VPAND(X0, X3, X3)
		VEXTRACTF128(Imm(1), Y4, X5)
		VPACKSSDW(X5, X4, X4)
		VPACKSSWB(X4, X4, X4)
		VPAND(X0, X4, X4)
		VINSERTI128(Imm(1), X4, Y3, Y3)
		VINSERTI128(Imm(1), X2, Y1, Y1)
		VPUNPCKLQDQ(Y3, Y1, Y1)
		VPERMQ(Imm(216), Y1, Y1)
		VMOVDQU(Y1, Mem{Base: RDI}.Idx(RAX, 1))
		ADDQ(Imm(32), RAX)
		CMPQ(R8, RAX)
		JNE(LabelRef("LBB11_4"))
		CMPQ(R8, RCX)
		JE(LabelRef("LBB11_7"))
	}

	Label("LBB11_6")
	{
		VMOVSS(Mem{Base: RSI}.Idx(R8, 4), X0)
		VUCOMISS(Mem{Base: RDX}.Idx(R8, 4), X0)
		SETNE(Mem{Base: RDI}.Idx(R8, 1))
		ADDQ(Imm(1), R8)
		CMPQ(RCX, R8)
		JNE(LabelRef("LBB11_6"))
	}

	Label("LBB11_7")
	{
		VZEROUPPER()
		RET()
	}
}

func genLtNumber_F64() {

	data := GLOBL("dataLtNumberF64", RODATA|NOPTR)
	DATA(0, U8(1))
	DATA(1, U8(1))
	DATA(2, U8(1))
	DATA(3, U8(1))
	DATA(4, U8(0))
	DATA(5, U8(0))
	DATA(6, U8(0))
	DATA(7, U8(0))
	DATA(8, U8(0))
	DATA(9, U8(0))
	DATA(10, U8(0))
	DATA(11, U8(0))
	DATA(12, U8(0))
	DATA(13, U8(0))
	DATA(14, U8(0))
	DATA(15, U8(0))

	TEXT("LtNumber_AVX2_F64", NOSPLIT, "func(x []bool, y []float64, a float64)")
	Pragma("noescape")
	Load(Param("x").Base(), RDI)
	Load(Param("y").Base(), RSI)
	Load(Param("a"), X0)
	Load(Param("x").Len(), RDX)

	TESTQ(RDX, RDX)
	JE(LabelRef("LBB12_7"))
	CMPQ(RDX, Imm(16))
	JAE(LabelRef("LBB12_3"))
	XORL(EAX, EAX)
	JMP(LabelRef("LBB12_6"))

	Label("LBB12_3")
	{
		MOVQ(RDX, RAX)
		ANDQ(I32(-16), RAX)
		VBROADCASTSD(X0, Y1)
		XORL(ECX, ECX)
		VMOVDQU(data.Offset(0), X2)
	}

	Label("LBB12_4")
	{
		VMOVUPD(Mem{Base: RSI}.Idx(RCX, 8), Y3)
		VMOVUPD(Mem{Base: RSI}.Idx(RCX, 8).Offset(32), Y4)
		VMOVUPD(Mem{Base: RSI}.Idx(RCX, 8).Offset(64), Y5)
		VMOVUPD(Mem{Base: RSI}.Idx(RCX, 8).Offset(96), Y6)
		VCMPPD(Imm(1), Y1, Y3, Y3)
		VEXTRACTF128(Imm(1), Y3, X7)
		VPACKSSDW(X7, X3, X3)
		VPACKSSDW(X3, X3, X3)
		VPACKSSWB(X3, X3, X3)
		VPAND(X2, X3, X3)
		VCMPPD(Imm(1), Y1, Y4, Y4)
		VEXTRACTF128(Imm(1), Y4, X7)
		VPACKSSDW(X7, X4, X4)
		VPACKSSDW(X4, X4, X4)
		VPACKSSWB(X4, X4, X4)
		VPAND(X2, X4, X4)
		VPUNPCKLDQ(X4, X3, X3)
		VCMPPD(Imm(1), Y1, Y5, Y4)
		VEXTRACTF128(Imm(1), Y4, X5)
		VPACKSSDW(X5, X4, X4)
		VPACKSSDW(X4, X4, X4)
		VPACKSSWB(X4, X4, X4)
		VPAND(X2, X4, X4)
		VCMPPD(Imm(1), Y1, Y6, Y5)
		VEXTRACTF128(Imm(1), Y5, X6)
		VPACKSSDW(X6, X5, X5)
		VPACKSSDW(X5, X5, X5)
		VPACKSSWB(X5, X5, X5)
		VPAND(X2, X5, X5)
		VPBROADCASTD(X5, X5)
		VPBROADCASTD(X4, X4)
		VPUNPCKLDQ(X5, X4, X4)
		VPBLENDD(Imm(12), X4, X3, X3)
		VMOVDQU(X3, Mem{Base: RDI}.Idx(RCX, 1))
		ADDQ(Imm(16), RCX)
		CMPQ(RAX, RCX)
		JNE(LabelRef("LBB12_4"))
		CMPQ(RAX, RDX)
		JE(LabelRef("LBB12_7"))
	}

	Label("LBB12_6")
	{
		VUCOMISD(Mem{Base: RSI}.Idx(RAX, 8), X0)
		SETHI(Mem{Base: RDI}.Idx(RAX, 1))
		ADDQ(Imm(1), RAX)
		CMPQ(RDX, RAX)
		JNE(LabelRef("LBB12_6"))
	}

	Label("LBB12_7")
	{
		VZEROUPPER()
		RET()
	}
}

func genLtNumber_F32() {

	data := GLOBL("dataLtNumberF32", RODATA|NOPTR)
	DATA(0, U8(1))
	DATA(1, U8(1))
	DATA(2, U8(1))
	DATA(3, U8(1))
	DATA(4, U8(1))
	DATA(5, U8(1))
	DATA(6, U8(1))
	DATA(7, U8(1))
	DATA(8, U8(0))
	DATA(9, U8(0))
	DATA(10, U8(0))
	DATA(11, U8(0))
	DATA(12, U8(0))
	DATA(13, U8(0))
	DATA(14, U8(0))
	DATA(15, U8(0))

	TEXT("LtNumber_AVX2_F32", NOSPLIT, "func(x []bool, y []float32, a float32)")
	Pragma("noescape")
	Load(Param("x").Base(), RDI)
	Load(Param("y").Base(), RSI)
	Load(Param("a"), X0)
	Load(Param("x").Len(), RDX)

	TESTQ(RDX, RDX)
	JE(LabelRef("LBB13_7"))
	CMPQ(RDX, Imm(32))
	JAE(LabelRef("LBB13_3"))
	XORL(EAX, EAX)
	JMP(LabelRef("LBB13_6"))

	Label("LBB13_3")
	{
		MOVQ(RDX, RAX)
		ANDQ(I32(-32), RAX)
		VBROADCASTSS(X0, Y1)
		XORL(ECX, ECX)
		VMOVDQU(data.Offset(0), X2)
	}

	Label("LBB13_4")
	{
		VMOVUPS(Mem{Base: RSI}.Idx(RCX, 4), Y3)
		VMOVUPS(Mem{Base: RSI}.Idx(RCX, 4).Offset(32), Y4)
		VMOVUPS(Mem{Base: RSI}.Idx(RCX, 4).Offset(64), Y5)
		VMOVUPS(Mem{Base: RSI}.Idx(RCX, 4).Offset(96), Y6)
		VCMPPS(Imm(1), Y1, Y3, Y3)
		VEXTRACTF128(Imm(1), Y3, X7)
		VPACKSSDW(X7, X3, X3)
		VPACKSSWB(X3, X3, X3)
		VPAND(X2, X3, X3)
		VCMPPS(Imm(1), Y1, Y4, Y4)
		VEXTRACTF128(Imm(1), Y4, X7)
		VPACKSSDW(X7, X4, X4)
		VPACKSSWB(X4, X4, X4)
		VPAND(X2, X4, X4)
		VCMPPS(Imm(1), Y1, Y5, Y5)
		VEXTRACTF128(Imm(1), Y5, X7)
		VPACKSSDW(X7, X5, X5)
		VPACKSSWB(X5, X5, X5)
		VPAND(X2, X5, X5)
		VCMPPS(Imm(1), Y1, Y6, Y6)
		VEXTRACTF128(Imm(1), Y6, X7)
		VPACKSSDW(X7, X6, X6)
		VPACKSSWB(X6, X6, X6)
		VPAND(X2, X6, X6)
		VINSERTI128(Imm(1), X6, Y5, Y5)
		VINSERTI128(Imm(1), X4, Y3, Y3)
		VPUNPCKLQDQ(Y5, Y3, Y3)
		VPERMQ(Imm(216), Y3, Y3)
		VMOVDQU(Y3, Mem{Base: RDI}.Idx(RCX, 1))
		ADDQ(Imm(32), RCX)
		CMPQ(RAX, RCX)
		JNE(LabelRef("LBB13_4"))
		CMPQ(RAX, RDX)
		JE(LabelRef("LBB13_7"))
	}

	Label("LBB13_6")
	{
		VUCOMISS(Mem{Base: RSI}.Idx(RAX, 4), X0)
		SETHI(Mem{Base: RDI}.Idx(RAX, 1))
		ADDQ(Imm(1), RAX)
		CMPQ(RDX, RAX)
		JNE(LabelRef("LBB13_6"))
	}

	Label("LBB13_7")
	{
		VZEROUPPER()
		RET()
	}
}

func genLteNumber_F64() {

	data := GLOBL("dataLteNumberF64", RODATA|NOPTR)
	DATA(0, U8(1))
	DATA(1, U8(1))
	DATA(2, U8(1))
	DATA(3, U8(1))
	DATA(4, U8(0))
	DATA(5, U8(0))
	DATA(6, U8(0))
	DATA(7, U8(0))
	DATA(8, U8(0))
	DATA(9, U8(0))
	DATA(10, U8(0))
	DATA(11, U8(0))
	DATA(12, U8(0))
	DATA(13, U8(0))
	DATA(14, U8(0))
	DATA(15, U8(0))

	TEXT("LteNumber_AVX2_F64", NOSPLIT, "func(x []bool, y []float64, a float64)")
	Pragma("noescape")
	Load(Param("x").Base(), RDI)
	Load(Param("y").Base(), RSI)
	Load(Param("a"), X0)
	Load(Param("x").Len(), RDX)

	TESTQ(RDX, RDX)
	JE(LabelRef("LBB14_7"))
	CMPQ(RDX, Imm(16))
	JAE(LabelRef("LBB14_3"))
	XORL(EAX, EAX)
	JMP(LabelRef("LBB14_6"))

	Label("LBB14_3")
	{
		MOVQ(RDX, RAX)
		ANDQ(I32(-16), RAX)
		VBROADCASTSD(X0, Y1)
		XORL(ECX, ECX)
		VMOVDQU(data.Offset(0), X2)
	}

	Label("LBB14_4")
	{
		VMOVUPD(Mem{Base: RSI}.Idx(RCX, 8), Y3)
		VMOVUPD(Mem{Base: RSI}.Idx(RCX, 8).Offset(32), Y4)
		VMOVUPD(Mem{Base: RSI}.Idx(RCX, 8).Offset(64), Y5)
		VMOVUPD(Mem{Base: RSI}.Idx(RCX, 8).Offset(96), Y6)
		VCMPPD(Imm(2), Y1, Y3, Y3)
		VEXTRACTF128(Imm(1), Y3, X7)
		VPACKSSDW(X7, X3, X3)
		VPACKSSDW(X3, X3, X3)
		VPACKSSWB(X3, X3, X3)
		VPAND(X2, X3, X3)
		VCMPPD(Imm(2), Y1, Y4, Y4)
		VEXTRACTF128(Imm(1), Y4, X7)
		VPACKSSDW(X7, X4, X4)
		VPACKSSDW(X4, X4, X4)
		VPACKSSWB(X4, X4, X4)
		VPAND(X2, X4, X4)
		VPUNPCKLDQ(X4, X3, X3)
		VCMPPD(Imm(2), Y1, Y5, Y4)
		VEXTRACTF128(Imm(1), Y4, X5)
		VPACKSSDW(X5, X4, X4)
		VPACKSSDW(X4, X4, X4)
		VPACKSSWB(X4, X4, X4)
		VPAND(X2, X4, X4)
		VCMPPD(Imm(2), Y1, Y6, Y5)
		VEXTRACTF128(Imm(1), Y5, X6)
		VPACKSSDW(X6, X5, X5)
		VPACKSSDW(X5, X5, X5)
		VPACKSSWB(X5, X5, X5)
		VPAND(X2, X5, X5)
		VPBROADCASTD(X5, X5)
		VPBROADCASTD(X4, X4)
		VPUNPCKLDQ(X5, X4, X4)
		VPBLENDD(Imm(12), X4, X3, X3)
		VMOVDQU(X3, Mem{Base: RDI}.Idx(RCX, 1))
		ADDQ(Imm(16), RCX)
		CMPQ(RAX, RCX)
		JNE(LabelRef("LBB14_4"))
		CMPQ(RAX, RDX)
		JE(LabelRef("LBB14_7"))
	}

	Label("LBB14_6")
	{
		VUCOMISD(Mem{Base: RSI}.Idx(RAX, 8), X0)
		SETCC(Mem{Base: RDI}.Idx(RAX, 1))
		ADDQ(Imm(1), RAX)
		CMPQ(RDX, RAX)
		JNE(LabelRef("LBB14_6"))
	}

	Label("LBB14_7")
	{
		VZEROUPPER()
		RET()
	}
}

func genLteNumber_F32() {

	data := GLOBL("dataLteNumberF32", RODATA|NOPTR)
	DATA(0, U8(1))
	DATA(1, U8(1))
	DATA(2, U8(1))
	DATA(3, U8(1))
	DATA(4, U8(1))
	DATA(5, U8(1))
	DATA(6, U8(1))
	DATA(7, U8(1))
	DATA(8, U8(0))
	DATA(9, U8(0))
	DATA(10, U8(0))
	DATA(11, U8(0))
	DATA(12, U8(0))
	DATA(13, U8(0))
	DATA(14, U8(0))
	DATA(15, U8(0))

	TEXT("LteNumber_AVX2_F32", NOSPLIT, "func(x []bool, y []float32, a float32)")
	Pragma("noescape")
	Load(Param("x").Base(), RDI)
	Load(Param("y").Base(), RSI)
	Load(Param("a"), X0)
	Load(Param("x").Len(), RDX)

	TESTQ(RDX, RDX)
	JE(LabelRef("LBB15_7"))
	CMPQ(RDX, Imm(32))
	JAE(LabelRef("LBB15_3"))
	XORL(EAX, EAX)
	JMP(LabelRef("LBB15_6"))

	Label("LBB15_3")
	{
		MOVQ(RDX, RAX)
		ANDQ(I32(-32), RAX)
		VBROADCASTSS(X0, Y1)
		XORL(ECX, ECX)
		VMOVDQU(data.Offset(0), X2)
	}

	Label("LBB15_4")
	{
		VMOVUPS(Mem{Base: RSI}.Idx(RCX, 4), Y3)
		VMOVUPS(Mem{Base: RSI}.Idx(RCX, 4).Offset(32), Y4)
		VMOVUPS(Mem{Base: RSI}.Idx(RCX, 4).Offset(64), Y5)
		VMOVUPS(Mem{Base: RSI}.Idx(RCX, 4).Offset(96), Y6)
		VCMPPS(Imm(2), Y1, Y3, Y3)
		VEXTRACTF128(Imm(1), Y3, X7)
		VPACKSSDW(X7, X3, X3)
		VPACKSSWB(X3, X3, X3)
		VPAND(X2, X3, X3)
		VCMPPS(Imm(2), Y1, Y4, Y4)
		VEXTRACTF128(Imm(1), Y4, X7)
		VPACKSSDW(X7, X4, X4)
		VPACKSSWB(X4, X4, X4)
		VPAND(X2, X4, X4)
		VCMPPS(Imm(2), Y1, Y5, Y5)
		VEXTRACTF128(Imm(1), Y5, X7)
		VPACKSSDW(X7, X5, X5)
		VPACKSSWB(X5, X5, X5)
		VPAND(X2, X5, X5)
		VCMPPS(Imm(2), Y1, Y6, Y6)
		VEXTRACTF128(Imm(1), Y6, X7)
		VPACKSSDW(X7, X6, X6)
		VPACKSSWB(X6, X6, X6)
		VPAND(X2, X6, X6)
		VINSERTI128(Imm(1), X6, Y5, Y5)
		VINSERTI128(Imm(1), X4, Y3, Y3)
		VPUNPCKLQDQ(Y5, Y3, Y3)
		VPERMQ(Imm(216), Y3, Y3)
		VMOVDQU(Y3, Mem{Base: RDI}.Idx(RCX, 1))
		ADDQ(Imm(32), RCX)
		CMPQ(RAX, RCX)
		JNE(LabelRef("LBB15_4"))
		CMPQ(RAX, RDX)
		JE(LabelRef("LBB15_7"))
	}

	Label("LBB15_6")
	{
		VUCOMISS(Mem{Base: RSI}.Idx(RAX, 4), X0)
		SETCC(Mem{Base: RDI}.Idx(RAX, 1))
		ADDQ(Imm(1), RAX)
		CMPQ(RDX, RAX)
		JNE(LabelRef("LBB15_6"))
	}

	Label("LBB15_7")
	{
		VZEROUPPER()
		RET()
	}
}

func genGtNumber_F64() {

	data := GLOBL("dataGtNumberF64", RODATA|NOPTR)
	DATA(0, U8(1))
	DATA(1, U8(1))
	DATA(2, U8(1))
	DATA(3, U8(1))
	DATA(4, U8(0))
	DATA(5, U8(0))
	DATA(6, U8(0))
	DATA(7, U8(0))
	DATA(8, U8(0))
	DATA(9, U8(0))
	DATA(10, U8(0))
	DATA(11, U8(0))
	DATA(12, U8(0))
	DATA(13, U8(0))
	DATA(14, U8(0))
	DATA(15, U8(0))

	TEXT("GtNumber_AVX2_F64", NOSPLIT, "func(x []bool, y []float64, a float64)")
	Pragma("noescape")
	Load(Param("x").Base(), RDI)
	Load(Param("y").Base(), RSI)
	Load(Param("a"), X0)
	Load(Param("x").Len(), RDX)

	TESTQ(RDX, RDX)
	JE(LabelRef("LBB16_7"))
	CMPQ(RDX, Imm(16))
	JAE(LabelRef("LBB16_3"))
	XORL(EAX, EAX)
	JMP(LabelRef("LBB16_6"))

	Label("LBB16_3")
	{
		MOVQ(RDX, RAX)
		ANDQ(I32(-16), RAX)
		VBROADCASTSD(X0, Y1)
		XORL(ECX, ECX)
		VMOVDQU(data.Offset(0), X2)
	}

	Label("LBB16_4")
	{
		VCMPPD(Imm(1), Mem{Base: RSI}.Idx(RCX, 8), Y1, Y3)
		VEXTRACTF128(Imm(1), Y3, X4)
		VPACKSSDW(X4, X3, X3)
		VPACKSSDW(X3, X3, X3)
		VPACKSSWB(X3, X3, X3)
		VCMPPD(Imm(1), Mem{Base: RSI}.Idx(RCX, 8).Offset(32), Y1, Y4)
		VPAND(X2, X3, X3)
		VEXTRACTF128(Imm(1), Y4, X5)
		VPACKSSDW(X5, X4, X4)
		VPACKSSDW(X4, X4, X4)
		VPACKSSWB(X4, X4, X4)
		VPAND(X2, X4, X4)
		VCMPPD(Imm(1), Mem{Base: RSI}.Idx(RCX, 8).Offset(64), Y1, Y5)
		VPUNPCKLDQ(X4, X3, X3)
		VEXTRACTF128(Imm(1), Y5, X4)
		VPACKSSDW(X4, X5, X4)
		VPACKSSDW(X4, X4, X4)
		VPACKSSWB(X4, X4, X4)
		VPAND(X2, X4, X4)
		VCMPPD(Imm(1), Mem{Base: RSI}.Idx(RCX, 8).Offset(96), Y1, Y5)
		VEXTRACTF128(Imm(1), Y5, X6)
		VPACKSSDW(X6, X5, X5)
		VPACKSSDW(X5, X5, X5)
		VPACKSSWB(X5, X5, X5)
		VPAND(X2, X5, X5)
		VPBROADCASTD(X5, X5)
		VPBROADCASTD(X4, X4)
		VPUNPCKLDQ(X5, X4, X4)
		VPBLENDD(Imm(12), X4, X3, X3)
		VMOVDQU(X3, Mem{Base: RDI}.Idx(RCX, 1))
		ADDQ(Imm(16), RCX)
		CMPQ(RAX, RCX)
		JNE(LabelRef("LBB16_4"))
		CMPQ(RAX, RDX)
		JE(LabelRef("LBB16_7"))
	}

	Label("LBB16_6")
	{
		VUCOMISD(Mem{Base: RSI}.Idx(RAX, 8), X0)
		SETCS(Mem{Base: RDI}.Idx(RAX, 1))
		ADDQ(Imm(1), RAX)
		CMPQ(RDX, RAX)
		JNE(LabelRef("LBB16_6"))
	}

	Label("LBB16_7")
	{
		VZEROUPPER()
		RET()
	}
}

func genGtNumber_F32() {

	data := GLOBL("dataGtNumberF32", RODATA|NOPTR)
	DATA(0, U8(1))
	DATA(1, U8(1))
	DATA(2, U8(1))
	DATA(3, U8(1))
	DATA(4, U8(1))
	DATA(5, U8(1))
	DATA(6, U8(1))
	DATA(7, U8(1))
	DATA(8, U8(0))
	DATA(9, U8(0))
	DATA(10, U8(0))
	DATA(11, U8(0))
	DATA(12, U8(0))
	DATA(13, U8(0))
	DATA(14, U8(0))
	DATA(15, U8(0))

	TEXT("GtNumber_AVX2_F32", NOSPLIT, "func(x []bool, y []float32, a float32)")
	Pragma("noescape")
	Load(Param("x").Base(), RDI)
	Load(Param("y").Base(), RSI)
	Load(Param("a"), X0)
	Load(Param("x").Len(), RDX)

	TESTQ(RDX, RDX)
	JE(LabelRef("LBB17_7"))
	CMPQ(RDX, Imm(32))
	JAE(LabelRef("LBB17_3"))
	XORL(EAX, EAX)
	JMP(LabelRef("LBB17_6"))

	Label("LBB17_3")
	{
		MOVQ(RDX, RAX)
		ANDQ(I32(-32), RAX)
		VBROADCASTSS(X0, Y1)
		XORL(ECX, ECX)
		VMOVDQU(data.Offset(0), X2)
	}

	Label("LBB17_4")
	{
		VCMPPS(Imm(1), Mem{Base: RSI}.Idx(RCX, 4), Y1, Y3)
		VEXTRACTF128(Imm(1), Y3, X4)
		VPACKSSDW(X4, X3, X3)
		VPACKSSWB(X3, X3, X3)
		VCMPPS(Imm(1), Mem{Base: RSI}.Idx(RCX, 4).Offset(32), Y1, Y4)
		VPAND(X2, X3, X3)
		VEXTRACTF128(Imm(1), Y4, X5)
		VPACKSSDW(X5, X4, X4)
		VPACKSSWB(X4, X4, X4)
		VPAND(X2, X4, X4)
		VCMPPS(Imm(1), Mem{Base: RSI}.Idx(RCX, 4).Offset(64), Y1, Y5)
		VEXTRACTF128(Imm(1), Y5, X6)
		VPACKSSDW(X6, X5, X5)
		VPACKSSWB(X5, X5, X5)
		VCMPPS(Imm(1), Mem{Base: RSI}.Idx(RCX, 4).Offset(96), Y1, Y6)
		VPAND(X2, X5, X5)
		VEXTRACTF128(Imm(1), Y6, X7)
		VPACKSSDW(X7, X6, X6)
		VPACKSSWB(X6, X6, X6)
		VPAND(X2, X6, X6)
		VINSERTI128(Imm(1), X6, Y5, Y5)
		VINSERTI128(Imm(1), X4, Y3, Y3)
		VPUNPCKLQDQ(Y5, Y3, Y3)
		VPERMQ(Imm(216), Y3, Y3)
		VMOVDQU(Y3, Mem{Base: RDI}.Idx(RCX, 1))
		ADDQ(Imm(32), RCX)
		CMPQ(RAX, RCX)
		JNE(LabelRef("LBB17_4"))
		CMPQ(RAX, RDX)
		JE(LabelRef("LBB17_7"))
	}

	Label("LBB17_6")
	{
		VUCOMISS(Mem{Base: RSI}.Idx(RAX, 4), X0)
		SETCS(Mem{Base: RDI}.Idx(RAX, 1))
		ADDQ(Imm(1), RAX)
		CMPQ(RDX, RAX)
		JNE(LabelRef("LBB17_6"))
	}

	Label("LBB17_7")
	{
		VZEROUPPER()
		RET()
	}
}

func genGteNumber_F64() {

	data := GLOBL("dataGteNumberF64", RODATA|NOPTR)
	DATA(0, U8(1))
	DATA(1, U8(1))
	DATA(2, U8(1))
	DATA(3, U8(1))
	DATA(4, U8(0))
	DATA(5, U8(0))
	DATA(6, U8(0))
	DATA(7, U8(0))
	DATA(8, U8(0))
	DATA(9, U8(0))
	DATA(10, U8(0))
	DATA(11, U8(0))
	DATA(12, U8(0))
	DATA(13, U8(0))
	DATA(14, U8(0))
	DATA(15, U8(0))

	TEXT("GteNumber_AVX2_F64", NOSPLIT, "func(x []bool, y []float64, a float64)")
	Pragma("noescape")
	Load(Param("x").Base(), RDI)
	Load(Param("y").Base(), RSI)
	Load(Param("a"), X0)
	Load(Param("x").Len(), RDX)

	TESTQ(RDX, RDX)
	JE(LabelRef("LBB18_7"))
	CMPQ(RDX, Imm(16))
	JAE(LabelRef("LBB18_3"))
	XORL(EAX, EAX)
	JMP(LabelRef("LBB18_6"))

	Label("LBB18_3")
	{
		MOVQ(RDX, RAX)
		ANDQ(I32(-16), RAX)
		VBROADCASTSD(X0, Y1)
		XORL(ECX, ECX)
		VMOVDQU(data.Offset(0), X2)
	}

	Label("LBB18_4")
	{
		VCMPPD(Imm(2), Mem{Base: RSI}.Idx(RCX, 8), Y1, Y3)
		VEXTRACTF128(Imm(1), Y3, X4)
		VPACKSSDW(X4, X3, X3)
		VPACKSSDW(X3, X3, X3)
		VPACKSSWB(X3, X3, X3)
		VCMPPD(Imm(2), Mem{Base: RSI}.Idx(RCX, 8).Offset(32), Y1, Y4)
		VPAND(X2, X3, X3)
		VEXTRACTF128(Imm(1), Y4, X5)
		VPACKSSDW(X5, X4, X4)
		VPACKSSDW(X4, X4, X4)
		VPACKSSWB(X4, X4, X4)
		VPAND(X2, X4, X4)
		VCMPPD(Imm(2), Mem{Base: RSI}.Idx(RCX, 8).Offset(64), Y1, Y5)
		VPUNPCKLDQ(X4, X3, X3)
		VEXTRACTF128(Imm(1), Y5, X4)
		VPACKSSDW(X4, X5, X4)
		VPACKSSDW(X4, X4, X4)
		VPACKSSWB(X4, X4, X4)
		VPAND(X2, X4, X4)
		VCMPPD(Imm(2), Mem{Base: RSI}.Idx(RCX, 8).Offset(96), Y1, Y5)
		VEXTRACTF128(Imm(1), Y5, X6)
		VPACKSSDW(X6, X5, X5)
		VPACKSSDW(X5, X5, X5)
		VPACKSSWB(X5, X5, X5)
		VPAND(X2, X5, X5)
		VPBROADCASTD(X5, X5)
		VPBROADCASTD(X4, X4)
		VPUNPCKLDQ(X5, X4, X4)
		VPBLENDD(Imm(12), X4, X3, X3)
		VMOVDQU(X3, Mem{Base: RDI}.Idx(RCX, 1))
		ADDQ(Imm(16), RCX)
		CMPQ(RAX, RCX)
		JNE(LabelRef("LBB18_4"))
		CMPQ(RAX, RDX)
		JE(LabelRef("LBB18_7"))
	}

	Label("LBB18_6")
	{
		VUCOMISD(Mem{Base: RSI}.Idx(RAX, 8), X0)
		SETLS(Mem{Base: RDI}.Idx(RAX, 1))
		ADDQ(Imm(1), RAX)
		CMPQ(RDX, RAX)
		JNE(LabelRef("LBB18_6"))
	}

	Label("LBB18_7")
	{
		VZEROUPPER()
		RET()
	}
}

func genGteNumber_F32() {

	data := GLOBL("dataGteNumberF32", RODATA|NOPTR)
	DATA(0, U8(1))
	DATA(1, U8(1))
	DATA(2, U8(1))
	DATA(3, U8(1))
	DATA(4, U8(1))
	DATA(5, U8(1))
	DATA(6, U8(1))
	DATA(7, U8(1))
	DATA(8, U8(0))
	DATA(9, U8(0))
	DATA(10, U8(0))
	DATA(11, U8(0))
	DATA(12, U8(0))
	DATA(13, U8(0))
	DATA(14, U8(0))
	DATA(15, U8(0))

	TEXT("GteNumber_AVX2_F32", NOSPLIT, "func(x []bool, y []float32, a float32)")
	Pragma("noescape")
	Load(Param("x").Base(), RDI)
	Load(Param("y").Base(), RSI)
	Load(Param("a"), X0)
	Load(Param("x").Len(), RDX)

	TESTQ(RDX, RDX)
	JE(LabelRef("LBB19_7"))
	CMPQ(RDX, Imm(32))
	JAE(LabelRef("LBB19_3"))
	XORL(EAX, EAX)
	JMP(LabelRef("LBB19_6"))

	Label("LBB19_3")
	{
		MOVQ(RDX, RAX)
		ANDQ(I32(-32), RAX)
		VBROADCASTSS(X0, Y1)
		XORL(ECX, ECX)
		VMOVDQU(data.Offset(0), X2)
	}

	Label("LBB19_4")
	{
		VCMPPS(Imm(2), Mem{Base: RSI}.Idx(RCX, 4), Y1, Y3)
		VEXTRACTF128(Imm(1), Y3, X4)
		VPACKSSDW(X4, X3, X3)
		VPACKSSWB(X3, X3, X3)
		VCMPPS(Imm(2), Mem{Base: RSI}.Idx(RCX, 4).Offset(32), Y1, Y4)
		VPAND(X2, X3, X3)
		VEXTRACTF128(Imm(1), Y4, X5)
		VPACKSSDW(X5, X4, X4)
		VPACKSSWB(X4, X4, X4)
		VPAND(X2, X4, X4)
		VCMPPS(Imm(2), Mem{Base: RSI}.Idx(RCX, 4).Offset(64), Y1, Y5)
		VEXTRACTF128(Imm(1), Y5, X6)
		VPACKSSDW(X6, X5, X5)
		VPACKSSWB(X5, X5, X5)
		VCMPPS(Imm(2), Mem{Base: RSI}.Idx(RCX, 4).Offset(96), Y1, Y6)
		VPAND(X2, X5, X5)
		VEXTRACTF128(Imm(1), Y6, X7)
		VPACKSSDW(X7, X6, X6)
		VPACKSSWB(X6, X6, X6)
		VPAND(X2, X6, X6)
		VINSERTI128(Imm(1), X6, Y5, Y5)
		VINSERTI128(Imm(1), X4, Y3, Y3)
		VPUNPCKLQDQ(Y5, Y3, Y3)
		VPERMQ(Imm(216), Y3, Y3)
		VMOVDQU(Y3, Mem{Base: RDI}.Idx(RCX, 1))
		ADDQ(Imm(32), RCX)
		CMPQ(RAX, RCX)
		JNE(LabelRef("LBB19_4"))
		CMPQ(RAX, RDX)
		JE(LabelRef("LBB19_7"))
	}

	Label("LBB19_6")
	{
		VUCOMISS(Mem{Base: RSI}.Idx(RAX, 4), X0)
		SETLS(Mem{Base: RDI}.Idx(RAX, 1))
		ADDQ(Imm(1), RAX)
		CMPQ(RDX, RAX)
		JNE(LabelRef("LBB19_6"))
	}

	Label("LBB19_7")
	{
		VZEROUPPER()
		RET()
	}
}

func genEqNumber_F64() {

	data := GLOBL("dataEqNumberF64", RODATA|NOPTR)
	DATA(0, U8(1))
	DATA(1, U8(1))
	DATA(2, U8(1))
	DATA(3, U8(1))
	DATA(4, U8(0))
	DATA(5, U8(0))
	DATA(6, U8(0))
	DATA(7, U8(0))
	DATA(8, U8(0))
	DATA(9, U8(0))
	DATA(10, U8(0))
	DATA(11, U8(0))
	DATA(12, U8(0))
	DATA(13, U8(0))
	DATA(14, U8(0))
	DATA(15, U8(0))

	TEXT("EqNumber_AVX2_F64", NOSPLIT, "func(x []bool, y []float64, a float64)")
	Pragma("noescape")
	Load(Param("x").Base(), RDI)
	Load(Param("y").Base(), RSI)
	Load(Param("a"), X0)
	Load(Param("x").Len(), RDX)

	TESTQ(RDX, RDX)
	JE(LabelRef("LBB20_7"))
	CMPQ(RDX, Imm(16))
	JAE(LabelRef("LBB20_3"))
	XORL(EAX, EAX)
	JMP(LabelRef("LBB20_6"))

	Label("LBB20_3")
	{
		MOVQ(RDX, RAX)
		ANDQ(I32(-16), RAX)
		VBROADCASTSD(X0, Y1)
		XORL(ECX, ECX)
		VMOVDQU(data.Offset(0), X2)
	}

	Label("LBB20_4")
	{
		VCMPPD(Imm(0), Mem{Base: RSI}.Idx(RCX, 8), Y1, Y3)
		VEXTRACTF128(Imm(1), Y3, X4)
		VPACKSSDW(X4, X3, X3)
		VPACKSSDW(X3, X3, X3)
		VPACKSSWB(X3, X3, X3)
		VCMPPD(Imm(0), Mem{Base: RSI}.Idx(RCX, 8).Offset(32), Y1, Y4)
		VPAND(X2, X3, X3)
		VEXTRACTF128(Imm(1), Y4, X5)
		VPACKSSDW(X5, X4, X4)
		VPACKSSDW(X4, X4, X4)
		VPACKSSWB(X4, X4, X4)
		VPAND(X2, X4, X4)
		VCMPPD(Imm(0), Mem{Base: RSI}.Idx(RCX, 8).Offset(64), Y1, Y5)
		VPUNPCKLDQ(X4, X3, X3)
		VEXTRACTF128(Imm(1), Y5, X4)
		VPACKSSDW(X4, X5, X4)
		VPACKSSDW(X4, X4, X4)
		VPACKSSWB(X4, X4, X4)
		VPAND(X2, X4, X4)
		VCMPPD(Imm(0), Mem{Base: RSI}.Idx(RCX, 8).Offset(96), Y1, Y5)
		VEXTRACTF128(Imm(1), Y5, X6)
		VPACKSSDW(X6, X5, X5)
		VPACKSSDW(X5, X5, X5)
		VPACKSSWB(X5, X5, X5)
		VPAND(X2, X5, X5)
		VPBROADCASTD(X5, X5)
		VPBROADCASTD(X4, X4)
		VPUNPCKLDQ(X5, X4, X4)
		VPBLENDD(Imm(12), X4, X3, X3)
		VMOVDQU(X3, Mem{Base: RDI}.Idx(RCX, 1))
		ADDQ(Imm(16), RCX)
		CMPQ(RAX, RCX)
		JNE(LabelRef("LBB20_4"))
		CMPQ(RAX, RDX)
		JE(LabelRef("LBB20_7"))
	}

	Label("LBB20_6")
	{
		VUCOMISD(Mem{Base: RSI}.Idx(RAX, 8), X0)
		SETEQ(Mem{Base: RDI}.Idx(RAX, 1))
		ADDQ(Imm(1), RAX)
		CMPQ(RDX, RAX)
		JNE(LabelRef("LBB20_6"))
	}

	Label("LBB20_7")
	{
		VZEROUPPER()
		RET()
	}
}

func genEqNumber_F32() {

	data := GLOBL("dataEqNumberF32", RODATA|NOPTR)
	DATA(0, U8(1))
	DATA(1, U8(1))
	DATA(2, U8(1))
	DATA(3, U8(1))
	DATA(4, U8(1))
	DATA(5, U8(1))
	DATA(6, U8(1))
	DATA(7, U8(1))
	DATA(8, U8(0))
	DATA(9, U8(0))
	DATA(10, U8(0))
	DATA(11, U8(0))
	DATA(12, U8(0))
	DATA(13, U8(0))
	DATA(14, U8(0))
	DATA(15, U8(0))

	TEXT("EqNumber_AVX2_F32", NOSPLIT, "func(x []bool, y []float32, a float32)")
	Pragma("noescape")
	Load(Param("x").Base(), RDI)
	Load(Param("y").Base(), RSI)
	Load(Param("a"), X0)
	Load(Param("x").Len(), RDX)

	TESTQ(RDX, RDX)
	JE(LabelRef("LBB21_7"))
	CMPQ(RDX, Imm(32))
	JAE(LabelRef("LBB21_3"))
	XORL(EAX, EAX)
	JMP(LabelRef("LBB21_6"))

	Label("LBB21_3")
	{
		MOVQ(RDX, RAX)
		ANDQ(I32(-32), RAX)
		VBROADCASTSS(X0, Y1)
		XORL(ECX, ECX)
		VMOVDQU(data.Offset(0), X2)
	}

	Label("LBB21_4")
	{
		VCMPPS(Imm(0), Mem{Base: RSI}.Idx(RCX, 4), Y1, Y3)
		VEXTRACTF128(Imm(1), Y3, X4)
		VPACKSSDW(X4, X3, X3)
		VPACKSSWB(X3, X3, X3)
		VCMPPS(Imm(0), Mem{Base: RSI}.Idx(RCX, 4).Offset(32), Y1, Y4)
		VPAND(X2, X3, X3)
		VEXTRACTF128(Imm(1), Y4, X5)
		VPACKSSDW(X5, X4, X4)
		VPACKSSWB(X4, X4, X4)
		VPAND(X2, X4, X4)
		VCMPPS(Imm(0), Mem{Base: RSI}.Idx(RCX, 4).Offset(64), Y1, Y5)
		VEXTRACTF128(Imm(1), Y5, X6)
		VPACKSSDW(X6, X5, X5)
		VPACKSSWB(X5, X5, X5)
		VCMPPS(Imm(0), Mem{Base: RSI}.Idx(RCX, 4).Offset(96), Y1, Y6)
		VPAND(X2, X5, X5)
		VEXTRACTF128(Imm(1), Y6, X7)
		VPACKSSDW(X7, X6, X6)
		VPACKSSWB(X6, X6, X6)
		VPAND(X2, X6, X6)
		VINSERTI128(Imm(1), X6, Y5, Y5)
		VINSERTI128(Imm(1), X4, Y3, Y3)
		VPUNPCKLQDQ(Y5, Y3, Y3)
		VPERMQ(Imm(216), Y3, Y3)
		VMOVDQU(Y3, Mem{Base: RDI}.Idx(RCX, 1))
		ADDQ(Imm(32), RCX)
		CMPQ(RAX, RCX)
		JNE(LabelRef("LBB21_4"))
		CMPQ(RAX, RDX)
		JE(LabelRef("LBB21_7"))
	}

	Label("LBB21_6")
	{
		VUCOMISS(Mem{Base: RSI}.Idx(RAX, 4), X0)
		SETEQ(Mem{Base: RDI}.Idx(RAX, 1))
		ADDQ(Imm(1), RAX)
		CMPQ(RDX, RAX)
		JNE(LabelRef("LBB21_6"))
	}

	Label("LBB21_7")
	{
		VZEROUPPER()
		RET()
	}
}

func genNeqNumber_F64() {

	data := GLOBL("dataNeqNumberF64", RODATA|NOPTR)
	DATA(0, U8(1))
	DATA(1, U8(1))
	DATA(2, U8(1))
	DATA(3, U8(1))
	DATA(4, U8(0))
	DATA(5, U8(0))
	DATA(6, U8(0))
	DATA(7, U8(0))
	DATA(8, U8(0))
	DATA(9, U8(0))
	DATA(10, U8(0))
	DATA(11, U8(0))
	DATA(12, U8(0))
	DATA(13, U8(0))
	DATA(14, U8(0))
	DATA(15, U8(0))

	TEXT("NeqNumber_AVX2_F64", NOSPLIT, "func(x []bool, y []float64, a float64)")
	Pragma("noescape")
	Load(Param("x").Base(), RDI)
	Load(Param("y").Base(), RSI)
	Load(Param("a"), X0)
	Load(Param("x").Len(), RDX)

	TESTQ(RDX, RDX)
	JE(LabelRef("LBB22_7"))
	CMPQ(RDX, Imm(16))
	JAE(LabelRef("LBB22_3"))
	XORL(EAX, EAX)
	JMP(LabelRef("LBB22_6"))

	Label("LBB22_3")
	{
		MOVQ(RDX, RAX)
		ANDQ(I32(-16), RAX)
		VBROADCASTSD(X0, Y1)
		XORL(ECX, ECX)
		VMOVDQU(data.Offset(0), X2)
	}

	Label("LBB22_4")
	{
		VCMPPD(Imm(4), Mem{Base: RSI}.Idx(RCX, 8), Y1, Y3)
		VEXTRACTF128(Imm(1), Y3, X4)
		VPACKSSDW(X4, X3, X3)
		VPACKSSDW(X3, X3, X3)
		VPACKSSWB(X3, X3, X3)
		VCMPPD(Imm(4), Mem{Base: RSI}.Idx(RCX, 8).Offset(32), Y1, Y4)
		VPAND(X2, X3, X3)
		VEXTRACTF128(Imm(1), Y4, X5)
		VPACKSSDW(X5, X4, X4)
		VPACKSSDW(X4, X4, X4)
		VPACKSSWB(X4, X4, X4)
		VPAND(X2, X4, X4)
		VCMPPD(Imm(4), Mem{Base: RSI}.Idx(RCX, 8).Offset(64), Y1, Y5)
		VPUNPCKLDQ(X4, X3, X3)
		VEXTRACTF128(Imm(1), Y5, X4)
		VPACKSSDW(X4, X5, X4)
		VPACKSSDW(X4, X4, X4)
		VPACKSSWB(X4, X4, X4)
		VPAND(X2, X4, X4)
		VCMPPD(Imm(4), Mem{Base: RSI}.Idx(RCX, 8).Offset(96), Y1, Y5)
		VEXTRACTF128(Imm(1), Y5, X6)
		VPACKSSDW(X6, X5, X5)
		VPACKSSDW(X5, X5, X5)
		VPACKSSWB(X5, X5, X5)
		VPAND(X2, X5, X5)
		VPBROADCASTD(X5, X5)
		VPBROADCASTD(X4, X4)
		VPUNPCKLDQ(X5, X4, X4)
		VPBLENDD(Imm(12), X4, X3, X3)
		VMOVDQU(X3, Mem{Base: RDI}.Idx(RCX, 1))
		ADDQ(Imm(16), RCX)
		CMPQ(RAX, RCX)
		JNE(LabelRef("LBB22_4"))
		CMPQ(RAX, RDX)
		JE(LabelRef("LBB22_7"))
	}

	Label("LBB22_6")
	{
		VUCOMISD(Mem{Base: RSI}.Idx(RAX, 8), X0)
		SETNE(Mem{Base: RDI}.Idx(RAX, 1))
		ADDQ(Imm(1), RAX)
		CMPQ(RDX, RAX)
		JNE(LabelRef("LBB22_6"))
	}

	Label("LBB22_7")
	{
		VZEROUPPER()
		RET()
	}
}

func genNeqNumber_F32() {

	data := GLOBL("dataNeqNumberF32", RODATA|NOPTR)
	DATA(0, U8(1))
	DATA(1, U8(1))
	DATA(2, U8(1))
	DATA(3, U8(1))
	DATA(4, U8(1))
	DATA(5, U8(1))
	DATA(6, U8(1))
	DATA(7, U8(1))
	DATA(8, U8(0))
	DATA(9, U8(0))
	DATA(10, U8(0))
	DATA(11, U8(0))
	DATA(12, U8(0))
	DATA(13, U8(0))
	DATA(14, U8(0))
	DATA(15, U8(0))

	TEXT("NeqNumber_AVX2_F32", NOSPLIT, "func(x []bool, y []float32, a float32)")
	Pragma("noescape")
	Load(Param("x").Base(), RDI)
	Load(Param("y").Base(), RSI)
	Load(Param("a"), X0)
	Load(Param("x").Len(), RDX)

	TESTQ(RDX, RDX)
	JE(LabelRef("LBB23_7"))
	CMPQ(RDX, Imm(32))
	JAE(LabelRef("LBB23_3"))
	XORL(EAX, EAX)
	JMP(LabelRef("LBB23_6"))

	Label("LBB23_3")
	{
		MOVQ(RDX, RAX)
		ANDQ(I32(-32), RAX)
		VBROADCASTSS(X0, Y1)
		XORL(ECX, ECX)
		VMOVDQU(data.Offset(0), X2)
	}

	Label("LBB23_4")
	{
		VCMPPS(Imm(4), Mem{Base: RSI}.Idx(RCX, 4), Y1, Y3)
		VEXTRACTF128(Imm(1), Y3, X4)
		VPACKSSDW(X4, X3, X3)
		VPACKSSWB(X3, X3, X3)
		VCMPPS(Imm(4), Mem{Base: RSI}.Idx(RCX, 4).Offset(32), Y1, Y4)
		VPAND(X2, X3, X3)
		VEXTRACTF128(Imm(1), Y4, X5)
		VPACKSSDW(X5, X4, X4)
		VPACKSSWB(X4, X4, X4)
		VPAND(X2, X4, X4)
		VCMPPS(Imm(4), Mem{Base: RSI}.Idx(RCX, 4).Offset(64), Y1, Y5)
		VEXTRACTF128(Imm(1), Y5, X6)
		VPACKSSDW(X6, X5, X5)
		VPACKSSWB(X5, X5, X5)
		VCMPPS(Imm(4), Mem{Base: RSI}.Idx(RCX, 4).Offset(96), Y1, Y6)
		VPAND(X2, X5, X5)
		VEXTRACTF128(Imm(1), Y6, X7)
		VPACKSSDW(X7, X6, X6)
		VPACKSSWB(X6, X6, X6)
		VPAND(X2, X6, X6)
		VINSERTI128(Imm(1), X6, Y5, Y5)
		VINSERTI128(Imm(1), X4, Y3, Y3)
		VPUNPCKLQDQ(Y5, Y3, Y3)
		VPERMQ(Imm(216), Y3, Y3)
		VMOVDQU(Y3, Mem{Base: RDI}.Idx(RCX, 1))
		ADDQ(Imm(32), RCX)
		CMPQ(RAX, RCX)
		JNE(LabelRef("LBB23_4"))
		CMPQ(RAX, RDX)
		JE(LabelRef("LBB23_7"))
	}

	Label("LBB23_6")
	{
		VUCOMISS(Mem{Base: RSI}.Idx(RAX, 4), X0)
		SETNE(Mem{Base: RDI}.Idx(RAX, 1))
		ADDQ(Imm(1), RAX)
		CMPQ(RDX, RAX)
		JNE(LabelRef("LBB23_6"))
	}

	Label("LBB23_7")
	{
		VZEROUPPER()
		RET()
	}
}
